Assignment 1

Q1

In 2003, A high density of mosquitos was found in : -USA in the southern regions in and around Houston, Dallas, Memphis and Jackson is infested with Aedes albopictus. -Mexico, Along the cost of the gulf of Mexico -Brazil, Recife, Fortalezer and the southern regions of Brazil of the species, Aedes aegypti. -Uruguay -Kenya -India, Along the west coast of South of India of both species. -South East asian countries such as Indonesia, Thailand of both species, and Taiwan with the highest density of mosquitos is infested with Aedes albopictus

In 2013, Brazil is heavily infested with mosquitos of the species, Aedes aegypti. While, Taiwan is infested with Aedes albopictus along with other infestation in Italy. This plot on the map comes with the problem of Occlusion of the data points as most of brazil is plotted with markers. Illusionary effects can also occur when zoomed in as the observations seem to be equally spaced and this creates illusionary dots while percieving

library(dplyr)
library(plotly)
library(akima)
library(sf)
library(stringi)
library(MASS)
mosq<-read.csv("aegypti_albopictus.csv",header= TRUE, sep = ",")
head(mosq)
##          VECTOR OCCURRENCE_ID SOURCE_TYPE LOCATION_TYPE POLYGON_ADMIN
## 1 Aedes aegypti             1   published         point          -999
## 2 Aedes aegypti             2   published         point          -999
## 3 Aedes aegypti             3   published         point          -999
## 4 Aedes aegypti             4   published         point          -999
## 5 Aedes aegypti             5   published         point          -999
## 6 Aedes aegypti             6   published         point          -999
##       Y     X YEAR COUNTRY COUNTRY_ID GAUL_AD0 STATUS
## 1 -3.22 40.07 1958   Kenya        KEN      133   <NA>
## 2 -4.27 15.30 1960   Congo        COG       59   <NA>
## 3 -4.27 15.30 1960   Congo        COG       59   <NA>
## 4 -3.22 40.07 1960   Kenya        KEN      133   <NA>
## 5 -3.04 40.14 1960   Kenya        KEN      133   <NA>
## 6  0.18 32.50 1960  Uganda        UGA      253   <NA>
Sys.setenv('MAPBOX_TOKEN' = 'pk.eyJ1Ijoib2JodXRhcmEiLCJhIjoiY2ptYm9yczN1MDcwMzNwbnlqMTY0eDl2bCJ9.IIY_aDN7MAQ8U_sok4xDww')
p10 <- mosq %>% filter(YEAR=='2004') %>% plot_mapbox(lat = ~Y, lon = ~X,
              color = ~VECTOR, mode = 'scattermapbox',hoverinfo='name')
p10
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
p11 <- mosq %>% filter(YEAR=='2013') %>% plot_mapbox(lat = ~Y, lon = ~X,
                                                   color = ~VECTOR, mode = 'scattermapbox',hoverinfo='name')
p11
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

Q2

There are many number of countries with very few mosquito obsersvations and so large area’s of the plot get a common color based on a large range as the significant numbers of mosquitos are in Taiwan which skews the colors represented as it is marked with red in the small area of Taiwan. Hence, there is little information conveyed by this map.

mosqcountry <- mosq %>% group_by(COUNTRY,COUNTRY_ID) %>%  summarise(Z=n())
mosqcountry$hover<-with(mosqcountry, paste(mosqcountry$COUNTRY, '<br>', "Number of mosquito's are ", Z))
head(mosqcountry)
## # A tibble: 6 x 4
## # Groups:   COUNTRY [6]
##   COUNTRY       COUNTRY_ID     Z hover                                    
##   <fct>         <fct>      <int> <chr>                                    
## 1 Afghanistan   AFG            1 Afghanistan <br> Number of mosquito's ar~
## 2 Albania       ALB           22 Albania <br> Number of mosquito's are  22
## 3 Algeria       DZA            1 Algeria <br> Number of mosquito's are  1 
## 4 American Sam~ ASM           12 American Samoa <br> Number of mosquito's~
## 5 Angola        AGO            1 Angola <br> Number of mosquito's are  1  
## 6 Anguilla      AIA           17 Anguilla <br> Number of mosquito's are  ~
l <- list(color = toRGB("white"), width = 2)
# specify some map projection/options
g <- list(
  projection = list(type = 'equirectangular')
)
tored <- c("#FFFF00","#FF0000")

p2 <- plot_geo(mosqcountry) %>%
  add_trace(
    z = ~Z,color = ~Z, colors = tored,
    text = ~paste(hover), locations = ~COUNTRY_ID
  ) %>%
  colorbar(title = "Number of Mosquitos") %>%
  layout(
    title = 'Number of mosquitos<br>(Hover for the number of mosquitos)',
    geo = g
  )
p2

Q3

Q3a

Taiwan has the highest mosquito population observed from during the course of the study. With Brazil and USA coming after.

g1 <- list(
  projection = list(type = 'azimuthal equidistant')
)
mosqcountry$logz=log(mosqcountry$Z)
  
p3a <- plot_geo(mosqcountry) %>%
  add_trace(
    z = ~Z,color = ~logz, colors = tored,
    text = ~paste(hover), locations = ~COUNTRY_ID
  ) %>%
  colorbar(title = "Number of Mosquitos") %>%
  layout(
    title = 'Number of mosquitos<br>(Hover for the number of mosquitos)',
    geo = g1
  )
p3a

Q3b

g2 <- list(
  projection = list(type = 'conic equal area')
)

p3b <- plot_geo(mosqcountry) %>%
  add_trace(
    z = ~Z,color = ~logz, colors = tored,
    text = ~paste(hover), locations = ~COUNTRY_ID
  ) %>%
  colorbar(title = "Number of Mosquitos") %>%
  layout(
    title = 'Number of mosquitos<br>(Hover for the number of mosquitos)',
    geo = g2
  )
p3b

In the equidistant projection, All points on the map are propotionately at equal distances from the center point.

The term Conic projection is used to refer to any projection in which meridians are mapped to equally spaced lines radiating out from the apex and circles of latitude are mapped to circular arcs centered on the apex. The resulting conic map has low distortion in scale, shape, and area near those standard parallels.

Q4

Recife , Salvador and Sao Paulo are the most infected by mosquitos, along the eastern coast of Brazil. Yes, the discretization helped in analysis as the high occurance locations have now been identified and occlusion of data points is also avoided.

mosqbrazil <- filter(mosq,COUNTRY_ID=="BRA",mosq$YEAR==2013)
mosqbrazil$X1<-cut_interval(as.numeric(mosqbrazil$X),n=100)
mosqbrazil$Y1<-cut_interval(as.numeric(mosqbrazil$Y),n=100)

head(mosqbrazil)
##          VECTOR OCCURRENCE_ID SOURCE_TYPE LOCATION_TYPE POLYGON_ADMIN
## 1 Aedes aegypti          5097 unpublished       polygon             2
## 2 Aedes aegypti          5098 unpublished       polygon             2
## 3 Aedes aegypti          5099 unpublished       polygon             2
## 4 Aedes aegypti          5100 unpublished       polygon             2
## 5 Aedes aegypti          5101 unpublished       polygon             2
## 6 Aedes aegypti          5102 unpublished       polygon             2
##        Y      X YEAR COUNTRY COUNTRY_ID GAUL_AD0 STATUS            X1
## 1 -32.21 -52.38 2013  Brazil        BRA       37   <NA> (-52.6,-52.2]
## 2 -31.56 -52.31 2013  Brazil        BRA       37   <NA> (-52.6,-52.2]
## 3 -30.80 -55.63 2013  Brazil        BRA       37   <NA> (-55.8,-55.4]
## 4 -30.37 -51.32 2013  Brazil        BRA       37   <NA>   (-51.4,-51]
## 5 -30.34 -54.37 2013  Brazil        BRA       37   <NA> (-54.6,-54.2]
## 6 -30.29 -56.16 2013  Brazil        BRA       37   <NA> (-56.2,-55.8]
##              Y1
## 1 [-32.2,-31.8]
## 2 (-31.8,-31.5]
## 3 (-31.1,-30.7]
## 4 (-30.7,-30.4]
## 5   (-30.4,-30]
## 6   (-30.4,-30]
mosqbragrouped<- mosqbrazil %>%
group_by(X1,Y1) %>% summarise("xlong"=mean(X),"ylat"=mean(Y),"totalmosq"=n())
head(mosqbragrouped)
## # A tibble: 6 x 5
## # Groups:   X1 [5]
##   X1            Y1            xlong   ylat totalmosq
##   <fct>         <fct>         <dbl>  <dbl>     <int>
## 1 [-72.8,-72.4] (-8.21,-7.84] -72.8  -7.96         1
## 2 (-71.2,-70.8] (-8.94,-8.57] -70.8  -8.9          1
## 3 (-70.4,-70]   (-10.8,-10.4] -70.0 -10.7          1
## 4 (-70,-69.6]   (-4.14,-3.77] -69.7  -4.03         1
## 5 (-69.6,-69.2] (-10.8,-10.4] -69.2 -10.7          1
## 6 (-69.6,-69.2] (-10.1,-9.68] -69.4  -9.77         1
Sys.setenv('MAPBOX_TOKEN' = 'pk.eyJ1Ijoib2JodXRhcmEiLCJhIjoiY2ptYm9yczN1MDcwMzNwbnlqMTY0eDl2bCJ9.IIY_aDN7MAQ8U_sok4xDww')
p4 <- mosqbragrouped %>%  plot_mapbox(lat = ~ylat, lon = ~xlong, color = ~totalmosq, 
                                      mode = 'scattermapbox',hoverinfo= ~totalmosq) %>% 
  layout(title = 'Most infected by mosquitos',
        font = list(color='white'),
        plot_bgcolor = '#191A1A', paper_bgcolor = '#191A1A',
        mapbox = list(style = 'dark'),
        legend = list(orientation = 'h',
                      font = list(size = 8)),
        margin = list(l = 25, r = 25,
                      b = 25, t = 25,
                      pad = 2))
p4

Assignment 2

Q1

Reading and preprocessing data with swedish household income.

#Q2.1
data2 = read.csv("SwedishHousehold.csv", skip = 2, col.names = c("Region", "Age", "Income"))
head(data2)
##                Region         Age Income
## 1 01 Stockholm county 18-29 years  385.4
## 2 01 Stockholm county 30-49 years  659.5
## 3 01 Stockholm county 50-64 years  683.3
## 4   03 Uppsala county 18-29 years  300.9
## 5   03 Uppsala county 30-49 years  542.7
## 6   03 Uppsala county 50-64 years  580.4
my_strip <- function(region) {
  region = strsplit(as.character(region), " ")[[1]][2]
}
data2$Region = lapply(data2$Region, my_strip)
data2$Region = as.character(data2$Region)
levels(data2$Age) = c("Young", "Adult", "Senior")
data2_rshaped = reshape(data2, idvar = "Region", timevar = "Age", direction = "wide", v.names = NULL)
names(data2_rshaped) = c("Region", "Youth","Adult","Senior")
head(data2_rshaped)
##          Region Youth Adult Senior
## 1     Stockholm 385.4 659.5  683.3
## 4       Uppsala 300.9 542.7  580.4
## 7  Södermanland 317.5 489.4  507.6
## 10 Östergötland 290.3 502.7  532.8
## 13    Jönköping 330.8 518.8  556.7
## 16    Kronoberg 307.3 503.2  530.3

Question 2

This plot is shows that Income is highly dependent on the age of a person.Old people have a really high salary range and it is quiet similar for adults butthe salary range is very low for the youth.

#Q2.2
plot_ly(data2, x=~factor(Age), y=~Income, type="violin", 
        split=~factor(Age), box=list(visible=T)) %>%
  layout(title ="Violin plot for Each age group")

Question 3

Yes I think linear regression would be a suitable model for this dependence. The range for adults and seniors is quiet similar and it is lower for the youth.I think Linear regression would give us a good fit to the data.

#Q2.3
attach(data2_rshaped)
s=interp(Senior,Youth,Adult, duplicate = "mean")
detach(data2_rshaped)
plot_ly(x=~s$x, y=~s$y, z=~s$z, type="surface")%>%
  layout(title ="Surface Plot",scene = list(
    xaxis = list(title = "Senior"),
    yaxis = list(title = "Youth"),
    zaxis = list(title = "Adult")
  ))

Question 4

The previos plots had no information about the dependence of income on region.This plot provides good information of how income is also dependent on the region you are in.The income is usually higher in the southern part of sweden compared to the northern part.According to this plot, the highest paying jobs are in Stockholm.

#Q2.4
rds<-readRDS("gadm36_SWE_1_sf.rds")
rownames(data2_rshaped)=data2_rshaped$Region
data2_rshaped["V?stra", 1] = "V?stra G?taland"
data2_rshaped["?rebro", 1] = "Orebro"
rownames(data2_rshaped)=data2_rshaped$Region
rds$Youth=data2_rshaped[rds$NAME_1, "Youth"]
rds$Adult=data2_rshaped[rds$NAME_1, "Adult"]
rds$Senior=data2_rshaped[rds$NAME_1, "Senior"]
p_youth<-plot_ly()%>%
  add_sf(data=rds, split=~NAME_1, color=~Youth, showlegend=F, alpha=1)%>%
  layout(title ="Choropleth map showing income for youth")
p_adults<-plot_ly()%>%
  add_sf(data=rds, split=~NAME_1, color=~Adult, showlegend=F, alpha=1)%>%
  layout(title ="Choropleth map showing income for adults")
p_youth
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed
p_adults
## Warning: line.color doesn't (yet) support data arrays

## Warning: Only one fillcolor per trace allowed
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed

Question 5

This is the same Choropleth map we had in the previous question with the red marker showing Linkoping City.

#Q2.5
linkoping = read.csv("Linkoping.csv")
p_youth_withLoc<-plot_ly()%>%add_sf(data=rds, split=~NAME_1, 
                                    color=~Youth, showlegend=F, alpha=1)%>%
  add_markers(data = linkoping,
              y = ~latitude, x = ~longitude, text = ~desc)%>%
  layout(title ="Choropleth map showing income for youth")
p_youth_withLoc
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed

Appendix

library(dplyr)
library(plotly)
library(akima)
library(sf)
library(stringi)
library(MASS)
mosq<-read.csv("aegypti_albopictus.csv",header= TRUE, sep = ",")
head(mosq)
Sys.setenv('MAPBOX_TOKEN' = 'pk.eyJ1Ijoib2JodXRhcmEiLCJhIjoiY2ptYm9yczN1MDcwMzNwbnlqMTY0eDl2bCJ9.IIY_aDN7MAQ8U_sok4xDww')
p10 <- mosq %>% filter(YEAR=='2004') %>% plot_mapbox(lat = ~Y, lon = ~X,
              color = ~VECTOR, mode = 'scattermapbox',hoverinfo='name')
p10

p11 <- mosq %>% filter(YEAR=='2013') %>% plot_mapbox(lat = ~Y, lon = ~X,
                                                   color = ~VECTOR, mode = 'scattermapbox',hoverinfo='name')
p11
mosqcountry <- mosq %>% group_by(COUNTRY,COUNTRY_ID) %>%  summarise(Z=n())
mosqcountry$hover<-with(mosqcountry, paste(mosqcountry$COUNTRY, '<br>', "Number of mosquito's are ", Z))
head(mosqcountry)

l <- list(color = toRGB("white"), width = 2)
# specify some map projection/options
g <- list(
  projection = list(type = 'equirectangular')
)
tored <- c("#FFFF00","#FF0000")

p2 <- plot_geo(mosqcountry) %>%
  add_trace(
    z = ~Z,color = ~Z, colors = tored,
    text = ~paste(hover), locations = ~COUNTRY_ID
  ) %>%
  colorbar(title = "Number of Mosquitos") %>%
  layout(
    title = 'Number of mosquitos<br>(Hover for the number of mosquitos)',
    geo = g
  )
p2
g1 <- list(
  projection = list(type = 'azimuthal equidistant')
)
mosqcountry$logz=log(mosqcountry$Z)
  
p3a <- plot_geo(mosqcountry) %>%
  add_trace(
    z = ~Z,color = ~logz, colors = tored,
    text = ~paste(hover), locations = ~COUNTRY_ID
  ) %>%
  colorbar(title = "Number of Mosquitos") %>%
  layout(
    title = 'Number of mosquitos<br>(Hover for the number of mosquitos)',
    geo = g1
  )
p3a
g2 <- list(
  projection = list(type = 'conic equal area')
)

p3b <- plot_geo(mosqcountry) %>%
  add_trace(
    z = ~Z,color = ~logz, colors = tored,
    text = ~paste(hover), locations = ~COUNTRY_ID
  ) %>%
  colorbar(title = "Number of Mosquitos") %>%
  layout(
    title = 'Number of mosquitos<br>(Hover for the number of mosquitos)',
    geo = g2
  )
p3b
mosqbrazil <- filter(mosq,COUNTRY_ID=="BRA",mosq$YEAR==2013)
mosqbrazil$X1<-cut_interval(as.numeric(mosqbrazil$X),n=100)
mosqbrazil$Y1<-cut_interval(as.numeric(mosqbrazil$Y),n=100)

head(mosqbrazil)
mosqbragrouped<- mosqbrazil %>%
group_by(X1,Y1) %>% summarise("xlong"=mean(X),"ylat"=mean(Y),"totalmosq"=n())
head(mosqbragrouped)

Sys.setenv('MAPBOX_TOKEN' = 'pk.eyJ1Ijoib2JodXRhcmEiLCJhIjoiY2ptYm9yczN1MDcwMzNwbnlqMTY0eDl2bCJ9.IIY_aDN7MAQ8U_sok4xDww')
p4 <- mosqbragrouped %>%  plot_mapbox(lat = ~ylat, lon = ~xlong, color = ~totalmosq, 
                                      mode = 'scattermapbox',hoverinfo= ~totalmosq) %>% 
  layout(title = 'Most infected by mosquitos',
        font = list(color='white'),
        plot_bgcolor = '#191A1A', paper_bgcolor = '#191A1A',
        mapbox = list(style = 'dark'),
        legend = list(orientation = 'h',
                      font = list(size = 8)),
        margin = list(l = 25, r = 25,
                      b = 25, t = 25,
                      pad = 2))
p4
#Q2.1
data2 = read.csv("SwedishHousehold.csv", skip = 2, col.names = c("Region", "Age", "Income"))
head(data2)
my_strip <- function(region) {
  region = strsplit(as.character(region), " ")[[1]][2]
}
data2$Region = lapply(data2$Region, my_strip)
data2$Region = as.character(data2$Region)
levels(data2$Age) = c("Young", "Adult", "Senior")
data2_rshaped = reshape(data2, idvar = "Region", timevar = "Age", direction = "wide", v.names = NULL)
names(data2_rshaped) = c("Region", "Youth","Adult","Senior")
head(data2_rshaped)
#Q2.2
plot_ly(data2, x=~factor(Age), y=~Income, type="violin", 
        split=~factor(Age), box=list(visible=T)) %>%
  layout(title ="Violin plot for Each age group")
#Q2.3
attach(data2_rshaped)
s=interp(Senior,Youth,Adult, duplicate = "mean")
detach(data2_rshaped)
plot_ly(x=~s$x, y=~s$y, z=~s$z, type="surface")%>%
  layout(title ="Surface Plot",scene = list(
    xaxis = list(title = "Senior"),
    yaxis = list(title = "Youth"),
    zaxis = list(title = "Adult")
  ))
#Q2.4
rds<-readRDS("gadm36_SWE_1_sf.rds")
rownames(data2_rshaped)=data2_rshaped$Region
data2_rshaped["V?stra", 1] = "V?stra G?taland"
data2_rshaped["?rebro", 1] = "Orebro"
rownames(data2_rshaped)=data2_rshaped$Region
rds$Youth=data2_rshaped[rds$NAME_1, "Youth"]
rds$Adult=data2_rshaped[rds$NAME_1, "Adult"]
rds$Senior=data2_rshaped[rds$NAME_1, "Senior"]
p_youth<-plot_ly()%>%
  add_sf(data=rds, split=~NAME_1, color=~Youth, showlegend=F, alpha=1)%>%
  layout(title ="Choropleth map showing income for youth")
p_adults<-plot_ly()%>%
  add_sf(data=rds, split=~NAME_1, color=~Adult, showlegend=F, alpha=1)%>%
  layout(title ="Choropleth map showing income for adults")
p_youth
p_adults
#Q2.5
linkoping = read.csv("Linkoping.csv")
p_youth_withLoc<-plot_ly()%>%add_sf(data=rds, split=~NAME_1, 
                                    color=~Youth, showlegend=F, alpha=1)%>%
  add_markers(data = linkoping,
              y = ~latitude, x = ~longitude, text = ~desc)%>%
  layout(title ="Choropleth map showing income for youth")
p_youth_withLoc